import urllib.request as ur
import lxml.etree as le
import user_agent

# 构造网页Request请求
def getResquest(url):
    request = ur.Request(
        url=url,
        headers={
            'Use-Agent':user_agent.get_user_agent_pc(),
        }
    )
    response = ur.urlopen(request).read()
    return response




if __name__ == '__main__':
    # 下载csdn学院首页html代码
    try:
        response = getResquest(
            url='https://edu.csdn.net/'
        )
        # 把得到的文件写入本地
        with open('csdn.html','wb') as f:
            f.write(response)
    except Exception as e:
        print(e)
     # 提取csdn学院的课程分类
    try:
        with open('csdn.html','r',encoding='utf-8') as f:
            # 获取本地html文件
            html = f.read()
            # 解析html文件，使其可以使用Xpath提取
            html_x = le.HTML(html)
            # 获取结果的总目录，方便下列提取所需结果
            div_x_s = html_x.xpath('//div[@class="classify_cList"]')
            data_s = []
            for div_x in div_x_s:
                # 获得一级目录
                category_1 = div_x.xpath('./h3/a/text()')[0]
                # 获得二级目录
                category_2_s = div_x.xpath('./div/span/a/text()')
                data_s.append(
                    dict(
                        category1 = category_1,
                        category2_s = category_2_s,
                    )
                )
            # print(data_s)
            for data in data_s:
                print(data.get('category1'))
                for category2 in data.get('category2_s'):
                    print('     ',category2)
    except Exception as e:
        print(e)
